### File    : corstange-replication.r
##  Author  : Daniel Corstange (daniel.corstange@gmail.com)
##  Created : Thu 16 Jul 2015 --- 09:22 AM
##  Changed : Tue 28 Jul 2015 --- 03:05 PM
##  Notes   : data analysis for "Anti-American Behavior in the
##            Middle East"

### core
#### preliminaries: load and process data
##### helper functions
###### strip out 'nuisance' answer categories

##   cat.string expects the label of the category
##   cat.number expects the numeric code
strip.junk <- function(x, cat.string, cat.number) {
  if (is.factor(x) == TRUE) {
    x    <- x[, drop = TRUE]
    lvls <- levels(x)
    lvls <- lvls[-which(lvls == cat.string)]
    if (length(lvls) > 0) {
      x    <- ifelse(x == cat.string, NA, x)
      x    <- factor(x = x, labels = lvls)
    }
  } else if (is.numeric(x) == TRUE)
    x <- ifelse(x == cat.number, NA, x)
  return(x)
}

###### clean a raw variable and return the substantive answer

clean <- function(x) {
  strip.junk(strip.junk(strip.junk(x,
                                   "No answer", -99),
                        "Don't Know", -98),
             "Not Applicable", 99)
}

##### read data from SPSS file

library(foreign)
SPSS <- read.spss("corstange-data.sav")

##### process data

## let's use cleaned data and memorable variable names
Leb <- data.frame(interviewer      = SPSS$Research,
                  neigh            = SPSS$Q2,
                  district         = SPSS$Q2a,
                  female           = SPSS$Q3,
                  treatment        = SPSS$Q4,
                  participate      = SPSS$Q5,
                  age              = clean(SPSS$Q6),
                  educ             = clean(SPSS$Q7),
                  income           = clean(SPSS$Q8),
                  elec             = clean(SPSS$Q9),
                  sect.all         = clean(SPSS$Q10),
                  trust            = clean(SPSS$Q11),
                  trust.sect       = clean(SPSS$Q12),
                  trust.foreign    = clean(SPSS$Q13),
                  know.amend       = clean(SPSS$Q14a),
                  know.dist        = clean(SPSS$Q14b),
                  know.sect        = clean(SPSS$Q14c),
                  interest         = clean(SPSS$Q15),
                  party            = clean(SPSS$Q16),
                  foreign.trump    = clean(SPSS$Q17),
                  disarm           = clean(SPSS$Q18),
                  jumblatt         = clean(SPSS$Q19),
                  new.govt         = clean(SPSS$Q20),
                  stl.abolish      = clean(SPSS$Q21),
                  stl.neutral      = clean(SPSS$Q22),
                  stl.mar14        = clean(SPSS$Q23a),
                  stl.mar08        = clean(SPSS$Q23b),
                  stl.un           = clean(SPSS$Q23c),
                  stl.japan        = clean(SPSS$Q23d),
                  stl.us           = clean(SPSS$Q23e),
                  stl.israel       = clean(SPSS$Q23f),
                  stl.iran         = clean(SPSS$Q23g),
                  syria            = clean(SPSS$Q24),
                  reconsider       = clean(SPSS$Q25),
                  gatekeeper       = clean(SPSS$Q26),
                  private          = clean(SPSS$Q27),
                  age.guess        = clean(SPSS$Q28),
                  income.guess     = clean(SPSS$Q29),
                  neigh.type       = clean(SPSS$Q30))

## let's clean up a few things
Leb$neigh <- factor(Leb$neigh)     # remove stray empty factor
Leb$syria <-                       # clean stray 0
    factor(x      = ifelse(Leb$syria == 0, NA, Leb$syria),
           labels = c("Strongly Protestors",
                      "Somewhat Protestors",
                      "Neither",
                      "Somewhat Government",
                      "Strongly Government"))
Leb$age   <- ifelse(Leb$age == 101, NA, Leb$age) # 101 is code for NA
Leb$educ4 <-                       # combine illiterate and primary
  factor(x      = with(Leb, ifelse(educ == "Illiterate", 2, educ)),
         labels = c("Primary", "Secondary", "BA", "MA"))
Leb$educ3 <-                       # trivariate version, scaled 0-1
  with(Leb,
       (ifelse(educ == "Illiterate", 2,
               ifelse(educ == "Master's Degree or Higher",
                      4, educ))) - 2) / 2

Leb$inc <- (as.integer(Leb$income) - 1) / 6 # scaled 0-1

Leb$treat <-                                # relevel and abbreviate
    with(Leb,
         factor(x      = treatment,
                levels = levels(treatment)[c(1,4,5,6,3,2)],
                labels = c("ctrl", "umd", "aub",
                           "aub.umd", "can", "us")))

Leb$know <-                                 # scaled 0-1
  with(Leb,
       apply(cbind(ifelse(know.amend == "Two-Thirds Majority", 1, 0),
                   ifelse(know.dist  == 26, 1, 0),
                   ifelse(know.sect  == 18, 1, 0)),
             1, sum, na.rm = TRUE)) / 3

Leb$participate <- ifelse(Leb$participate == "Refused", 0, 1)
Leb$gatekeeper <- ifelse(Leb$gatekeeper == "Yes", 1, 0)

Leb$sect.main <-                # convenience: just the 3 big ones
  with(Leb,
       factor(x = ifelse(sect.all == "Sunni", 1,
                         ifelse(sect.all == "Shiite", 2,
                                ifelse(!(sect.all %in% c("Druze", "Alaoui")),
                                       3, NA))),
              labels = c("Sunni", "Shia", "Christian")))

Leb$sect4 <-                    # Christian, Druze, Shia, Sunni
  with(Leb,
       factor(x = ifelse(sect.all == "Sunni", 4,
                         ifelse(sect.all == "Shiite", 3,
                                ifelse(sect.all == "Druze", 2,
                                       ifelse(sect.all != "Alaoui", 1, NA)))),
              labels = c("Christian", "Druze", "Shia", "Sunni")))

Leb$neigh.3types <-
    with(Leb,
         factor(x      = ifelse(neigh.type %in% c("Christian",
                                                  "Sunni",
                                                  "Druze"), 3,
                                ifelse(neigh.type == "Shia", 2, 1)),
                labels = c("Mixed", "Shia", "Non-Shia")))

Leb$mar08.all <-
    with(Leb,
         ifelse(is.na(party), NA,
                ifelse(party %in% c("March 8 Forces",
                                    "Amal Movement/ Nabih Berri",
                                    "Hizballah/ Hassan Nasrallah",
                                    "Free Patriotic Movement/ Michel Aoun",
                                    "Tashnaq",
                                    "Syrian Social Nationalist Party / SSNP",
                                    "Lebanese Communist Party",
                                    "El Marada Movement / Sleiman Frangieh",
                                    "People Movement / Najah Wakim",
                                    "Islamic Charity Projects Association",
                                    "Al Mourabetoon",
                                    " Arab Socialist Ba'ath Party",
                                    "Union Party / Abdul Rahim Murad",
                                    "Weam Wahhab",
                                    "Al Waed Party / Elie Hobeika" ,
                                    "Lebanese Democratic Party / Prince Talal Arslan"), 1, 0)))

Leb$mar14.all <-
    with(Leb,
         ifelse(is.na(party), NA,
                ifelse(party %in% c("March 14 Forces",
                                    "Future Movement/ Saad Hariri",
                                    "Lebanese Forces/ Samir Geagea",
                                    "Kataib/ Amin Gemayel",
                                    "Al-Jamaa al-Islamiya",
                                    "Social Democrat Hunchakian Party / SDHP"),
                       1, 0)))

## policy preferences (bigger numbers: more anti-American)
Leb$policies <-
    with(Leb, apply(cbind(disarm,
                          stl.abolish,
                          stl.neutral,
                          syria),
                    1, mean))

## policies dichotomized by control group median
Leb$policies.anti <-
    ifelse(Leb$policies < median(subset(Leb,
                                        subset = treat == "ctrl",
                                        select = policies,
                                        drop   = TRUE),
                                 na.rm = TRUE), 0, 1)

## anti-American index: above median on policies, March 08 supporter
Leb$antiamer <-
    with(Leb, ifelse(policies.anti & mar08.all, 1, 0))

#### helper functions
##### bootstrap (for participation rates)

bootme <- function(x, by, reps = 1000, use.median = FALSE) {
  Dat <- data.frame(x = x, by = as.integer(by))
  nby <- length(unique(by))
  out <- matrix(0, nrow = reps, ncol = nby)
  colnames(out) <- levels(by)
  for (b in 1:nby) {
    Sub <- subset(Dat, by == b)
    for (r in 1:reps) {
      this.sub <- Sub[sample(1:nrow(Sub), nrow(Sub), replace = T),]
      if (use.median == TRUE) {
        out[r,b] <- median(this.sub$x, na.rm = T)
      } else {
        out[r,b] <- mean(this.sub$x, na.rm = T)
      }
    }
  }
  return(out)
}
  
##### bootstrap confidence intervals

bootci <- function(x, p1 = .05, p2 = .10) {
    apply(x, 2, quantile, probs = c(p1/2, p2/2, .50, 1-p2/2, 1-p1/2))
}

##### bootstrap difference in means

## takes an object returned by the <bootme> function
boot.dim <- function(x, sig = 3) {
  ntr <- ncol(x)
  m <- s <- matrix(NA, nrow = ntr, ncol = ntr)
  rownames(m) <- colnames(m) <- colnames(x)
  for (i in 1:(ntr-1)) {
    for (j in (i+1):ntr) {
      m[i,j] <- mean(x[,i] - x[,j])
      s[i,j] <- sd(x[,i] - x[,j])
    }
  }
  return(lapply(list(dim  = t(abs(m)),
                     tval = t(abs(m/s)),
                     pval = t(pnorm(-abs(m/s)) * 2)),
                round, sig))
}

##### condition-wise chi-squared tests

chisq.me <- function(dv, iv, sig = 3) {
  ntr <- length(levels(iv))
  x <- p <- matrix(NA, nrow = ntr, ncol = ntr)
  rownames(x) <- colnames(x) <- 
    rownames(p) <- colnames(p) <- 
      levels(iv)
  for (i in 1:(ntr-1)) {
    for (j in (i+1):ntr) {
      test   <- chisq.test(table(iv, dv)[c(i,j),])
      x[i,j] <- test$statistic
      p[i,j] <- test$p.value
    }
  }
  return(lapply(list(chisq = t(x), pval = t(p)), round, sig))
}

#### participation/refusal
##### calculate
###### bootstrap refusal rates by condition

## full sample
boot.refuse.all <-
    with(Leb,
         bootme(1-participate, treat))

## Shia neighborhoods
boot.refuse.shia <-
    with(subset(Leb, subset = Leb$neigh.3types == "Shia"),
         bootme(1-participate, treat))

## non-Shia neighborhoods
boot.refuse.nonshia <-
    with(subset(Leb, subset = Leb$neigh.3types == "Non-Shia"),
         bootme(1-participate, treat))

###### bootstrap refusal rates, pooling control and universities 

## full sample, pooling control and university conditions
boot.refuse.all.pooled <-
    with(Leb,
         bootme(1-participate,
                factor(x      = ifelse(treat == "us", 3,
                                       ifelse(treat == "can", 2, 1)),
                       labels = c("ctrl.univs", "can", "us"))))

## Shia neighborhoods, pooling control and university conditions
boot.refuse.shia.pooled <-
    with(subset(Leb, subset = Leb$neigh.3types == "Shia"),
         bootme(1-participate,
                factor(x      = ifelse(treat == "us", 3,
                                       ifelse(treat == "can", 2, 1)),
                       labels = c("ctrl.univs", "can", "us"))))

## non-Shia neighborhoods, pooling control and university conditions
boot.refuse.nonshia.pooled <-
    with(subset(Leb, subset = Leb$neigh.3types == "Non-Shia"),
         bootme(1-participate,
                factor(x      = ifelse(treat == "us", 3,
                                       ifelse(treat == "can", 2, 1)),
                       labels = c("ctrl.univs", "can", "us"))))

###### difference in means between conditions

dim.refuse.all     <- boot.dim(boot.refuse.all)
dim.refuse.shia    <- boot.dim(boot.refuse.shia)
dim.refuse.nonshia <- boot.dim(boot.refuse.nonshia)

###### summarize refusal rates and confidence intervals

## refusals by condition
sum.refuse.all     <- bootci(boot.refuse.all)
sum.refuse.shia    <- bootci(boot.refuse.shia)
sum.refuse.nonshia <- bootci(boot.refuse.nonshia)

## refusals by condition, pooling control and university conditions
sum.refuse.all.pooled     <- bootci(boot.refuse.all.pooled)
sum.refuse.shia.pooled    <- bootci(boot.refuse.shia.pooled)
sum.refuse.nonshia.pooled <- bootci(boot.refuse.nonshia.pooled)

###### summarize differences and ratios between select conditions

## Shia neighborhoods: US vs. control, UMD, and Canada
sum.diff.shia <- 
    with(as.data.frame(boot.refuse.shia),
         apply(cbind(us.v.ctrl = us - ctrl,
                     us.v.umd  = us - umd,
                     us.v.can  = us - can),
               2, quantile,
               probs = c(.025, .050, .500, .950, .975)))
sum.rat.shia <- 
    with(as.data.frame(boot.refuse.shia),
         apply(cbind(us.v.ctrl = us / ctrl,
                     us.v.umd  = us / umd,
                     us.v.can  = us / can),
               2, quantile,
               probs = c(.025, .050, .500, .950, .975)))

## non-Shia neighborhoods: US vs. control, UMD, and Canada
sum.diff.nonshia <- 
    with(as.data.frame(boot.refuse.nonshia),
         apply(cbind(us.v.ctrl = us - ctrl,
                     us.v.umd  = us - umd,
                     us.v.can  = us - can),
               2, quantile,
               probs = c(.025, .050, .500, .950, .975)))
sum.rat.nonshia <- 
    with(as.data.frame(boot.refuse.nonshia),
         apply(cbind(us.v.ctrl = us / ctrl,
                     us.v.umd  = us / umd,
                     us.v.can  = us / can),
               2, quantile,
               probs = c(.025, .050, .500, .950, .975)))

###### refusers unsolicited offers after debriefing

boot.offer <-
  with(subset(Leb, subset = !participate),
       bootme(reconsider == "Yes",
              treat))

## most interested in pooled universities vs pooled embassies
boot.offer.alt <-
  with(subset(Leb, subset = !participate),
       bootme(reconsider == "Yes",
              factor(x = ifelse(treat %in% c("umd", "aub", "aub.umd"), 2,
                                ifelse(treat %in% c("can", "us"), 3, 1)),
                     labels = c("ctrl", "univs", "embs"))))

## summarize
sum.offer     <- bootci(boot.offer)
sum.offer.alt <- bootci(boot.offer.alt)

## differences in means
dim.offer     <- boot.dim(boot.offer)
dim.offer.alt <- boot.dim(boot.offer.alt)

##### tell me the results
###### refusals by condition

## refusals by condition
sum.refuse.all                          # full sample
sum.refuse.shia                         # Shia neighborhoods
sum.refuse.nonshia                      # non-Shia neighborhoods

## refusals by condition, pooling control and university conditions
sum.refuse.all.pooled                   # full sample
sum.refuse.shia.pooled                  # Shia neighborhoods
sum.refuse.nonshia.pooled               # non-Shia neighborhoods

###### condition-wise difference in means

## full sample
dim.refuse.all$dim
dim.refuse.all$pval

## Shia neighborhoods
dim.refuse.shia$dim
dim.refuse.shia$pval

## non-Shia neighborhoods
dim.refuse.nonshia$dim
dim.refuse.nonshia$pval

###### selected difference and ratios

## refusals, selected differences between conditions
sum.diff.shia                           # Shia neighborhoods
sum.diff.nonshia                        # non-Shia neighborhoods

## refusals, selected ratios between conditions
sum.rat.shia                            # Shia neighborhoods
sum.rat.nonshia                         # non-Shia neighborhoods

###### post-debriefing offers to participate

## summarized by condition
sum.offer                      # condition-wise
sum.offer.alt                  # pooled universities and embassies

## differences in means
dim.offer                      # condition-wise
dim.offer.alt                  # pooled universities and embassies

#### anti-Americanism
##### calculate
###### March 8 identification

boot.mar08 <- with(Leb, bootme(mar08.all, treat))
sum.mar08  <- bootci(boot.mar08)
dim.mar08  <- boot.dim(boot.mar08)

chi.mar08        <- with(Leb, chisq.me(mar08.all, treat))
chi.mar08.pooled <-
    with(Leb,
         chisq.me(mar08.all,
                  factor(x      = ifelse(treat == "us", 3,
                                         ifelse(treat == "can", 2, 1)),
                         labels = c("ctrl.univs", "can", "us"))))

###### policy preferences

boot.policies <- with(Leb, bootme(policies.anti, treat))
sum.policies  <- bootci(boot.policies)
dim.policies  <- boot.dim(boot.policies)

chi.policies        <- with(Leb, chisq.me(policies.anti, treat))
chi.policies.pooled <-
    with(Leb,
         chisq.me(policies.anti,
                  factor(x      = ifelse(treat == "us", 3,
                                         ifelse(treat == "can", 2, 1)),
                         labels = c("ctrl.univs", "can", "us"))))

###### anti-American index

boot.antiamer <- with(Leb, bootme(antiamer, treat))
sum.antiamer  <- bootci(boot.antiamer)
dim.antiamer  <- boot.dim(boot.antiamer)

chi.antiamer        <- with(Leb, chisq.me(antiamer, treat))
chi.antiamer.pooled <-
    with(Leb,
         chisq.me(antiamer,
                  factor(x      = ifelse(treat == "us", 3,
                                         ifelse(treat == "can", 2, 1)),
                         labels = c("ctrl.univs", "can", "us"))))

##### tell me the results
###### March 8 identification

sum.mar08                     # summary by condition
dim.mar08                     # condition-wise difference in means
chi.mar08                     # condition-wise chi-squared
chi.mar08.pooled              # condition-wise chi-squared

###### policy preferences

sum.policies                  # summary by condition
dim.policies                  # condition-wise difference in means
chi.policies                  # condition-wise chi-squared
chi.policies.pooled           # condition-wise chi-squared

###### anti-American index

sum.antiamer                  # summary by condition
dim.antiamer                  # condition-wise difference in means
chi.antiamer                  # condition-wise chi-squared
chi.antiamer.pooled           # condition-wise chi-squared

#### appendix items
##### balance checks
###### pre-treatment: imbalance by enumerator?

## p-values from chisq tests
enumerator.pval <- 
  with(Leb,
       apply(table(treat, as.integer(interviewer)),
             2, function(x) chisq.test(x)$p.value))

## approximations may be problematic for a few low-N enumerators
enumerator.pval.boot <-
  with(Leb,
       apply(table(treat, as.integer(interviewer)),
             2, function(x) chisq.test(x,
                                       simulate.p.value = TRUE,
                                       B = 1000)$p.value))

## 6% of enumerators are p <= .05 (what you'd expect by chance)
table(enumerator.pval <= .05)           # 4 of 65 
table(enumerator.pval.boot <= .05)      # 4 of 65

###### pre-treatment: imbalance by sex or neighborhood type?

## no imbalance by sex (p = .14)
with(Leb, chisq.test(table(treat, female)))

## no imbalance by neighborhood type (p = .33)
with(Leb, chisq.test(table(treat, neigh.3types)))

## how about a multinomial logit?
library(nnet)

## female + neighborhood types
mod.balance1 <-
  multinom(treat ~ female + neigh.3types,
           data = Leb)
summary(mod.balance1)

## female + 17 individual neighborhoods
mod.balance2 <-
  multinom(treat ~ female + neigh,
           data = Leb)
summary(mod.balance2)
## p = .04 on UMD and Canada for Choueifat
## but there's (17-1) * 5 = 80 neighborhood coefficients (2.5% p <= .05)

###### post-treatment: imbalance by socioeconomics?

mod.post.treatment <- 
  multinom(treat
           ~ female
           + age
           + educ3
           + know
           + inc
           + log(elec)
           ,
           data = Leb)
summary(mod.post.treatment)

##### appendix models
###### March 08 identification

mod.mar08.biv <-
    glm(mar08.all
        ~ treat
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.mar08.biv)

mod.mar08.control <-
    glm(mar08.all
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.mar08.control)

mod.mar08.extra1 <-
    glm(mar08.all
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        + neigh.3types
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.mar08.extra1)

mod.mar08.extra2 <-
    glm(mar08.all
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        + neigh.3types
        + sect4
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.mar08.extra2)

###### policy index

mod.ind.biv <-
    glm(policies.anti
        ~ treat
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.ind.biv)

mod.ind.control <-
    glm(policies.anti
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.ind.control)

mod.ind.extra1 <-
    glm(policies.anti
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        + neigh.3types
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.ind.extra1)

mod.ind.extra2 <-
    glm(policies.anti
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        + neigh.3types
        + sect4
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.ind.extra2)

###### march 08 + policy index jointly

mod.aa.biv <-
    glm(antiamer
        ~ treat
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.aa.biv)

mod.aa.control <-
    glm(antiamer
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.aa.control)

mod.aa.extra1 <-
    glm(antiamer
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        + neigh.3types
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.aa.extra1)

mod.aa.extra2 <-
    glm(antiamer
        ~ treat
        + female
        + age
        + educ3
        + know
        + inc
        + log(elec)
        + neigh.3types
        + sect4
        ,
        family = binomial(link = "probit"),
        data   = Leb)
summary(mod.aa.extra2)

#### misc
##### proportion of sect in a given neighborhood type

## 98% of residents of Shia neighborhoods are Shia
with(subset(Leb, subset = neigh.type == "Shia"),
     prop.table(table(sect4 == "Shia")))

## 98% of residents of Sunni neighborhoods are Sunni
with(subset(Leb, subset = neigh.type == "Sunni"),
     prop.table(table(sect4 == "Sunni")))

## 98% of residents of Christian neighborhoods are Christian
with(subset(Leb, subset = neigh.type == "Christian"),
     prop.table(table(sect4 == "Christian")))

## 95% of residents of Druze neighborhoods are Druze
with(subset(Leb, subset = neigh.type == "Druze"),
     prop.table(table(sect4 == "Druze")))

##### summary statistics, bloc ID and policy preferences (control group)

## control group: political bloc identification
sumstats.bloc <-
  with(subset(Leb, subset = treat == "ctrl"),
       as.data.frame(cbind(mar14 = tapply(mar14.all, sect.main,
                                          mean, na.rm = TRUE),
                           mar08 = tapply(mar08.all, sect.main,
                                          mean, na.rm = TRUE))))

## control group: policy preferences (reversed to match figure)
sumstats.policies <-
  with(subset(Leb, subset = treat == "ctrl"),
       as.data.frame(cbind(Q1     = tapply(6 - policies, sect.main,
                                           quantile, probs = .25,
                                           na.rm = TRUE),
                           Median = tapply(6 - policies, sect.main,
                                           median, na.rm = TRUE),
                           Q3     = tapply(6 - policies, sect.main,
                                           quantile, probs = .75,
                                           na.rm = TRUE))))

## tell me the results
sumstats.bloc
sumstats.policies

##### policy scale summary statistics

library(psy)
library(psych)

## Cronbach's alpha (control group)
with(subset(Leb, subset = treat == "ctrl"),
     cronbach(cbind(disarm,
                    stl.abolish,
                    stl.neutral,
                    syria)))

## correlations
policy.corr <- with(subset(Leb, subset = treat == "ctrl"),
                    cor(cbind(disarm,
                              stl.abolish,
                              stl.neutral,
                              syria),
                        use = "pairwise.complete.obs"))

## range, median, and mean correlation (with Fisher transformation)
range(policy.corr[lower.tri(policy.corr)])
median(policy.corr[lower.tri(policy.corr)])
fisherz2r(mean(fisherz(policy.corr[lower.tri(policy.corr)])))

##### trust by condition
###### full sample

## dichotomized as in main text: strongly agree (very trusting) 
with(Leb,                               # generalized
     chisq.test(table(treat,
                      as.integer(trust) == 5)))$p.value
with(Leb,                               # cosectarians
     chisq.test(table(treat,
                      as.integer(trust.sect) == 5)))$p.value
with(Leb,                               # foreigners
     chisq.test(table(treat,
                      as.integer(trust.foreign) == 5)))$p.value

## raw data (full 5-point scale)
with(Leb,                               # generalized
     chisq.test(table(treat,
                      trust)))$p.value
with(Leb,                               # cosectarians
     chisq.test(table(treat,
                      trust.sect)))$p.value
with(Leb,                               # foreigners
     chisq.test(table(treat,
                      trust.foreign)))$p.value

###### Shia subsample

## dichotomized as in main text: strongly agree (very trusting) 
with(subset(Leb, subset = sect4 == "Shia"), # generalized
     chisq.test(table(treat,
                      as.integer(trust) == 5),
                simulate.p.value = TRUE,
                B = 1000))$p.value
with(subset(Leb, subset = sect4 == "Shia"), # cosectarians
     chisq.test(table(treat,
                      as.integer(trust.sect) == 5),
                simulate.p.value = TRUE,
                B = 1000))$p.value
with(subset(Leb, subset = sect4 == "Shia"), # foreigners
     chisq.test(table(treat,
                      as.integer(trust.foreign) == 5),
                simulate.p.value = TRUE,
                B = 1000))$p.value

## raw data (full 5-point scale)
with(subset(Leb, subset = sect4 == "Shia"), # generalized
     chisq.test(table(treat,
                      trust),
                simulate.p.value = TRUE,
                B = 1000))$p.value
with(subset(Leb, subset = sect4 == "Shia"), # cosectarians
     chisq.test(table(treat,
                      trust.sect),
                simulate.p.value = TRUE,
                B = 1000))$p.value
with(subset(Leb, subset = sect4 == "Shia"), # foreigners
     chisq.test(table(treat,
                      trust.foreign),
                simulate.p.value = TRUE,
                B = 1000))$p.value

###### non-Shia subsample

## dichotomized as in main text: strongly agree (very trusting) 
with(subset(Leb, subset = sect.all != "Shiite"), # generalized
     chisq.test(table(treat,
                      as.integer(trust) == 5),
                simulate.p.value = TRUE,
                B = 1000))$p.value
with(subset(Leb, subset = sect.all != "Shiite"), # cosectarians
     chisq.test(table(treat,
                      as.integer(trust.sect) == 5),
                simulate.p.value = TRUE,
                B = 1000))$p.value
with(subset(Leb, subset = sect.all != "Shiite"), # foreigners
     chisq.test(table(treat,
                      as.integer(trust.foreign) == 5),
                simulate.p.value = TRUE,
                B = 1000))$p.value

## raw data (full 5-point scale)
with(subset(Leb, subset = sect.all != "Shiite"), # generalized
     chisq.test(table(treat,
                      trust),
                simulate.p.value = TRUE,
                B = 1000))$p.value
with(subset(Leb, subset = sect.all != "Shiite"), # cosectarians
     chisq.test(table(treat,
                      trust.sect),
                simulate.p.value = TRUE,
                B = 1000))$p.value
with(subset(Leb, subset = sect.all != "Shiite"), # foreigners
     chisq.test(table(treat,
                      trust.foreign),
                simulate.p.value = TRUE,
                B = 1000))$p.value

### Pew
#### preliminaries: load and process data
##### read data from SPSS file

## You'll need the Spring 2011 Dataset.  As of July 2015, the URL is:
## http://www.pewglobal.org/category/datasets/2011/?download=19586

require(foreign)
Pew <- read.spss("Pew Global Attitudes Spring 2011 Dataset for web.sav")
Pew <- as.data.frame(Pew) 

##### process data (full sample)

library(plyr)
library(psych)

## combine Pakistani samples, shorten Palestine label
Pew$country2 <- Pew$COUNTRY
Pew$country2 <- revalue(Pew$country2,
                        c("Pakistan refield"        = "Pakistan",
                          "Palestinian Territories" = "Palestine"))

## US and American favorability, low to high
Pew$us   <- with(Pew, ifelse(as.integer(Q3A) <= 4, 5 - as.integer(Q3A), NA))
Pew$amer <- with(Pew, ifelse(as.integer(Q3B) <= 4, 5 - as.integer(Q3B), NA))

## dichotomized versions
Pew$us.bi   <- ifelse(Pew$us   <= 2, 0, 1)
Pew$amer.bi <- ifelse(Pew$amer <= 2, 0, 1)

## mean anti-Americanism by country (dichotomized versions)
## mean US-American correlations by country (4-point versions)
## wgt inverse to sample size
AA <- data.frame(country = levels(Pew$country2),
                 wgt     = as.vector(1/table(Pew$country2)),
                 us.bi   = ddply(Pew, "country2",
                                 function(x) weighted.mean(x$us.bi,
                                                           x$WEIGHT,
                                                           na.rm = TRUE))[,2],
                 amer.bi = ddply(Pew, "country2",
                                 function(x) weighted.mean(x$amer.bi,
                                                           x$WEIGHT,
                                                           na.rm = TRUE))[,2],
                 corr    = ddply(Pew, "country2",
                                  function(x) {
                                    dat <- na.omit(cbind(x$us,
                                                         x$amer,
                                                         x$WEIGHT))
                                    cov.wt(dat[,1:2],
                                           dat[,3],
                                           cor = TRUE)$cor[1,2]
                                  })[,2])

##### process data (Lebanon only)

## just the Lebanon data
Pew.L <- subset(Pew, subset = Pew$COUNTRY == "Lebanon")

## gather data on sectarian composition
Pew.L$sect <-
  with(Pew.L,
       factor(x = ifelse(Q34LEB == "Christianity", 1,
                         ifelse(Q34LEB == "Druze", 4,
                                ifelse(as.integer(Q113) == 1, 2,
                                       ifelse(as.integer(Q113) == 2, 3,
                                              NA)))),
              labels = c("Christian", "Sunni", "Shia", "Druze")))
## simplify by dropping Druze
Pew.L$sect3 <-
  with(Pew.L,
       factor(x = ifelse(sect == "Druze", NA, sect),
              labels = c("Christian", "Sunni", "Shia")))

## mean anti-Americanism by sect (dichotomized versions)
## mean US-American correlations by sect (4-point versions)
AA.L <- data.frame(sect    = levels(Pew.L$sect3),
                   us.bi   = na.omit(ddply(Pew.L, "sect3",
                                           function(x) {
                                             weighted.mean(x$us.bi,
                                                           x$WEIGHT,
                                                           na.rm = TRUE)
                                             }))[,2],
                   amer.bi = na.omit(ddply(Pew.L, "sect3",
                                           function(x) {
                                             weighted.mean(x$amer.bi,
                                                           x$WEIGHT,
                                                           na.rm = TRUE)
                                             }))[,2],
                   corr    = na.omit(ddply(Pew.L, "sect3",
                                           function(x) {
                                             dat <- na.omit(cbind(x$us,
                                                                  x$amer,
                                                                  x$WEIGHT))
                                             cov.wt(dat[,1:2],
                                                    dat[,3],
                                                    cor = TRUE)$cor[1,2]
                                           }))[,2])

#### findings

## US and American favorability, and correlation, by country
AA[order(-AA$us.bi), c("country", "us.bi")]
AA[order(-AA$amer.bi), c("country", "amer.bi")]
AA[order(-AA$corr), c("country", "corr")]

## US-American correlation: country-level weighted mean and range
with(AA, fisherz2r(weighted.mean(fisherz(corr), wgt)))
range(AA$corr)

## Lebanese results
AA.L

### BBC (appendix figure)
#### preliminaries: load and process data
##### read data

## data sources listed in article's online appendix

BBC09 <- read.csv("bbc-2009.csv")
BBC10 <- read.csv("bbc-2010.csv")
BBC11 <- read.csv("bbc-2011.csv")
BBC12 <- read.csv("bbc-2012.csv")
BBC13 <- read.csv("bbc-2013.csv")

##### gather the positive and negative ratings, and differences

## the merge function makes my head hurt; I bet there's a more
## elegant way to do the following

## positives
POS        <- merge(BBC09[,1:2], BBC10[,1:2], by = "country")
POS        <- merge(POS, BBC11[,1:2], by = "country")
POS        <- merge(POS, BBC12[,1:2], by = "country")
POS        <- merge(POS, BBC13[,1:2], by = "country")
names(POS) <- c("country", "pos09", "pos10", "pos11", "pos12", "pos13")
POS$avg    <- apply(POS[,-1], 1, mean, na.rm = T)

## negatives
NEG        <- merge(BBC09[,c(1,3)], BBC10[,c(1,3)], by = "country")
NEG        <- merge(NEG, BBC11[,c(1,3)], by = "country")
NEG        <- merge(NEG, BBC12[,c(1,3)], by = "country")
NEG        <- merge(NEG, BBC13[,c(1,3)], by = "country")
names(NEG) <- c("country", "neg09", "neg10", "neg11", "neg12", "neg13")
NEG$avg    <- apply(NEG[,-1], 1, mean, na.rm = T)

## differences
DIF        <- merge(BBC09[,c(1,4)], BBC10[,c(1,4)], by = "country")
DIF        <- merge(DIF, BBC11[,c(1,4)], by = "country")
DIF        <- merge(DIF, BBC12[,c(1,4)], by = "country")
DIF        <- merge(DIF, BBC13[,c(1,4)], by = "country")
names(DIF) <- c("country", "dif09", "dif10", "dif11", "dif12", "dif13")
DIF$avg    <- apply(DIF[,-1], 1, mean, na.rm = T)

##### gather the 5-year averages

AVG <- data.frame(country    = POS$country,
                  positive   = POS$avg,
                  negative   = NEG$avg,
                  difference = DIF$avg)

#### show me the summary

AVG

#### show me some simple dotplots

library(lattice)

## by differences
AVG$country <- reorder(AVG$country, AVG$difference)
dotplot(country ~ difference, data = AVG)

## by positives
AVG$country <- reorder(AVG$country, AVG$positive)
dotplot(country ~ positive, data = AVG)

## by negatives
AVG$country <- reorder(AVG$country, AVG$negative)
dotplot(country ~ negative, data = AVG)

### (local variables)

## Local Variables:
## eval: (outline-minor-mode 1)
## eval: (font-lock-mode 1)
## outline-regexp: "^\###+ "
## outline-level: (lambda ()
##                  (cond
##                   ((looking-at "^### ")     1)
##                   ((looking-at "^#### ")    2)
##                   ((looking-at "^##### ")   3)
##                   ((looking-at "^###### ")  4)
##                   ((looking-at "^####### ") 5)
##                   (t 1000)))
## eval: (font-lock-add-keywords
##        nil
##        '(("^### .*" 0 (aref outline-font-lock-faces 0) t)
##          ("^#### .*" 0 (aref outline-font-lock-faces 1) t)
##          ("^##### .*" 0 (aref outline-font-lock-faces 2) t)
##          ("^###### .*" 0 (aref outline-font-lock-faces 3) t)
##          ("^####### .*" 0 (aref outline-font-lock-faces 4) t)))
## eval: (hide-sublevels 1)
## End:
